Loading libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0           ✔ purrr   0.2.5      
## ✔ tibble  1.4.2           ✔ dplyr   0.7.99.9000
## ✔ tidyr   0.8.1           ✔ stringr 1.3.1      
## ✔ readr   1.1.1           ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readxl)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(dygraphs)
library(ggcorrplot)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor

Citation

citation("tidyverse")
## 
## To cite package 'tidyverse' in publications use:
## 
##   Hadley Wickham (2017). tidyverse: Easily Install and Load the
##   'Tidyverse'. R package version 1.2.1.
##   https://CRAN.R-project.org/package=tidyverse
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {tidyverse: Easily Install and Load the 'Tidyverse'},
##     author = {Hadley Wickham},
##     year = {2017},
##     note = {R package version 1.2.1},
##     url = {https://CRAN.R-project.org/package=tidyverse},
##   }
citation("readxl")
## 
## To cite package 'readxl' in publications use:
## 
##   Hadley Wickham and Jennifer Bryan (2018). readxl: Read Excel
##   Files. R package version 1.1.0.
##   https://CRAN.R-project.org/package=readxl
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {readxl: Read Excel Files},
##     author = {Hadley Wickham and Jennifer Bryan},
##     year = {2018},
##     note = {R package version 1.1.0},
##     url = {https://CRAN.R-project.org/package=readxl},
##   }
citation("xts")
## 
## To cite package 'xts' in publications use:
## 
##   Jeffrey A. Ryan and Joshua M. Ulrich (2018). xts: eXtensible
##   Time Series. R package version 0.11-0.
##   https://CRAN.R-project.org/package=xts
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {xts: eXtensible Time Series},
##     author = {Jeffrey A. Ryan and Joshua M. Ulrich},
##     year = {2018},
##     note = {R package version 0.11-0},
##     url = {https://CRAN.R-project.org/package=xts},
##   }
citation("dygraphs")
## 
## To cite package 'dygraphs' in publications use:
## 
##   Dan Vanderkam, JJ Allaire, Jonathan Owen, Daniel Gromer and
##   Benoit Thieurmel (2018). dygraphs: Interface to 'Dygraphs'
##   Interactive Time Series Charting Library. R package version
##   1.1.1.6. https://CRAN.R-project.org/package=dygraphs
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {dygraphs: Interface to 'Dygraphs' Interactive Time Series Charting Library},
##     author = {Dan Vanderkam and JJ Allaire and Jonathan Owen and Daniel Gromer and Benoit Thieurmel},
##     year = {2018},
##     note = {R package version 1.1.1.6},
##     url = {https://CRAN.R-project.org/package=dygraphs},
##   }

Reading Excel spreadsheet.

churn <- read_excel("data/test_dfXL.xlsx", sheet = 1)
timeseries <-read_excel("data/test_dfXL.xlsx", sheet = 2)

Checking tibbles

Main df

head(churn)
## # A tibble: 6 x 10
##      ID START_DATE          END_DATE            LIFETIME RATING PHONE EMAIL
##   <dbl> <dttm>              <dttm>                 <dbl> <chr>  <chr> <chr>
## 1     1 2014-01-06 23:00:00 2014-03-18 23:00:00       71 NONE   N     N    
## 2     2 2013-07-05 00:00:00 2014-07-17 23:00:00      378 NONE   N     N    
## 3     3 2015-06-04 00:00:00 2017-04-17 23:00:00      684 NONE   Y     Y    
## 4     4 2014-01-08 23:00:00 2014-06-26 23:00:00      169 NONE   N     N    
## 5     5 2014-07-23 00:00:00 2014-11-16 23:00:00      117 NONE   N     N    
## 6     6 2017-07-21 00:00:00 2018-07-19 00:00:00      363 D      Y     Y    
## # ... with 3 more variables: CONTRACT <dbl>, COMPLAINT <dbl>,
## #   CHURNED <dbl>
glimpse(churn)
## Observations: 10,000
## Variables: 10
## $ ID         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ...
## $ START_DATE <dttm> 2014-01-06 23:00:00, 2013-07-05 00:00:00, 2015-06-...
## $ END_DATE   <dttm> 2014-03-18 23:00:00, 2014-07-17 23:00:00, 2017-04-...
## $ LIFETIME   <dbl> 71, 378, 684, 169, 117, 363, 571, 563, 772, 184, 39...
## $ RATING     <chr> "NONE", "NONE", "NONE", "NONE", "NONE", "D", "NONE"...
## $ PHONE      <chr> "N", "N", "Y", "N", "N", "Y", "N", "Y", "Y", "N", "...
## $ EMAIL      <chr> "N", "N", "Y", "N", "N", "Y", "N", "Y", "Y", "N", "...
## $ CONTRACT   <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, ...
## $ COMPLAINT  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ CHURNED    <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, ...

Dygraphs tibble

head(timeseries)
## # A tibble: 6 x 3
##   Date                  Male Female
##   <dttm>               <dbl>  <dbl>
## 1 2013-01-01 00:00:00 300958 380666
## 2 2013-01-02 00:00:00 300522 379425
## 3 2013-01-03 00:00:00 300194 378252
## 4 2013-01-04 00:00:00 299399 377525
## 5 2013-01-05 00:00:00 298566 376250
## 6 2013-01-06 00:00:00 298790 376519
glimpse(timeseries)
## Observations: 315
## Variables: 3
## $ Date   <dttm> 2013-01-01, 2013-01-02, 2013-01-03, 2013-01-04, 2013-0...
## $ Male   <dbl> 300958, 300522, 300194, 299399, 298566, 298790, 298233,...
## $ Female <dbl> 380666, 379425, 378252, 377525, 376250, 376519, 375724,...
data("diamonds")
head(diamonds)
## # A tibble: 6 x 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23  Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2 0.21  Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3 0.23  Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4 0.290 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5 0.31  Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6 0.24  Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
ggplot(diamonds, aes(x=price)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hist(diamonds$price)

ggplot(diamonds) + 
  geom_histogram(aes(x=log(price)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds, aes(x = price/carat)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diamonds,
       aes(x=price)) + 
  geom_density()

plot(density(diamonds$price))

plot(churn$LIFETIME)

plot(diamonds$depth,
     col = if_else(diamonds$depth > 75 | diamonds$depth < 50, "red", "black"),
     main = "Outliers",
     ylab = "Depth (%)")

ggplot(diamonds, aes(x=carat,y=price)) + 
  geom_point(aes(colour=cut))

ggplot(diamonds, aes(x=carat,y=price)) + 
  geom_jitter(aes(colour=cut), alpha = 0.5)

ggplot(diamonds,
       aes(cut,price)) +
  geom_boxplot() + 
  theme_bw()

ggplot(diamonds,
       aes(cut,carat)) + 
  geom_boxplot()

ggplot(diamonds,
       aes(cut,price)) + 
  geom_boxplot(outlier.shape = NA,
               width = 0.25) + 
  scale_y_continuous(limits = c(0,15000)) + 
  theme_bw()
## Warning: Removed 1655 rows containing non-finite values (stat_boxplot).

ggplot(diamonds,
       aes(cut,price)) +
  geom_violin(fill = "blue") + 
  coord_flip()

diamonds %>%
  group_by(cut) %>%
  summarise(median_price = median(price,na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(reorder(cut,median_price),median_price)) +
  geom_col(width = 0.75,
           fill = "lavender",
           colour = "black") + 
  geom_hline(yintercept = 0) + 
  labs(x = "Cut",
        y = "Median Price") + 
  coord_flip()

churn %>%
  select(START_DATE,RATING) %>%
  mutate(START_MONTH = as.yearmon(START_DATE)) %>%
  mutate(START_MONTH = as.POSIXct(START_MONTH)) %>%
  group_by(START_MONTH, RATING) %>%
  summarise(total = n()) %>%
  ungroup() %>%
  ggplot(aes(x=START_MONTH,
             y=total,
             colour = RATING)) + 
  geom_line()

corrgram::corrgram(diamonds)

ggplot(diamonds,
       aes(x=carat,
                 y=sqrt(price))) + 
  geom_jitter(aes(colour = cut), alpha = 0.5) + 
  labs(title = "Linear Relationship between Carat and Price",
       subtitle = "Source: diamonds datasaet from the ggplot2 packate ",
       x = "Carat",
       y = "Squared Price") + 
  scale_y_continuous(label = scales::dollar) + 
  theme_minimal()

ggplot(diamonds,
       aes(x=carat,
           y=sqrt(price))) + 
  geom_point() + 
  geom_smooth(method = "lm") +
  labs(title = "Linear Relationship between Carat and Price",
       subtitle = "Source: diamonds datasaet from the ggplot2 packate ",
       x = "Carat",
       y = "Squared Price") + 
  facet_wrap(~cut, scales = "free_y") +
  scale_y_continuous(label = scales::dollar) + 
  theme_minimal()

Dygraphs

m.xts <- xts(lead(timeseries$Male), timeseries$Date)
f.xts <- xts(lead(timeseries$Female), timeseries$Date)

all <- cbind(m.xts,f.xts)

dygraph(all)
dygraph(all) %>% 
  dyRangeSelector()